circuit ALUexe :
  module ScalarALU :
    input clock : Clock
    input reset : Reset
    output io : { flip func : UInt<5>, flip in2 : UInt<32>, flip in1 : UInt<32>, flip in3 : UInt<32>, out : UInt<32>, cmp_out : UInt<1>}

    node _in2_inv_T = geq(io.func, UInt<5>("ha")) @[ALU.scala 56:31]
    node _in2_inv_T_1 = leq(io.func, UInt<5>("hf")) @[ALU.scala 56:49]
    node _in2_inv_T_2 = and(_in2_inv_T, _in2_inv_T_1) @[ALU.scala 56:42]
    node _in2_inv_T_3 = not(io.in2) @[ALU.scala 80:38]
    node in2_inv = mux(_in2_inv_T_2, _in2_inv_T_3, io.in2) @[ALU.scala 80:20]
    node _adder_out_T = add(io.in1, in2_inv) @[ALU.scala 81:26]
    node _adder_out_T_1 = tail(_adder_out_T, 1) @[ALU.scala 81:26]
    node _adder_out_T_2 = geq(io.func, UInt<5>("ha")) @[ALU.scala 56:31]
    node _adder_out_T_3 = leq(io.func, UInt<5>("hf")) @[ALU.scala 56:49]
    node _adder_out_T_4 = and(_adder_out_T_2, _adder_out_T_3) @[ALU.scala 56:42]
    node _adder_out_T_5 = add(_adder_out_T_1, _adder_out_T_4) @[ALU.scala 81:36]
    node adder_out = tail(_adder_out_T_5, 1) @[ALU.scala 81:36]
    node in1_xor_in2 = xor(io.in1, in2_inv) @[ALU.scala 82:28]
    node _slt_T = bits(io.in1, 31, 31) @[ALU.scala 86:15]
    node _slt_T_1 = bits(io.in2, 31, 31) @[ALU.scala 86:34]
    node _slt_T_2 = eq(_slt_T, _slt_T_1) @[ALU.scala 86:24]
    node _slt_T_3 = bits(adder_out, 31, 31) @[ALU.scala 86:53]
    node _slt_T_4 = bits(io.func, 1, 1) @[ALU.scala 58:35]
    node _slt_T_5 = bits(io.in2, 31, 31) @[ALU.scala 87:39]
    node _slt_T_6 = bits(io.in1, 31, 31) @[ALU.scala 87:55]
    node _slt_T_7 = mux(_slt_T_4, _slt_T_5, _slt_T_6) @[ALU.scala 87:10]
    node slt = mux(_slt_T_2, _slt_T_3, _slt_T_7) @[ALU.scala 86:8]
    node _io_cmp_out_T = bits(io.func, 0, 0) @[ALU.scala 59:35]
    node _io_cmp_out_T_1 = bits(io.func, 3, 3) @[ALU.scala 60:30]
    node _io_cmp_out_T_2 = eq(_io_cmp_out_T_1, UInt<1>("h0")) @[ALU.scala 60:26]
    node _io_cmp_out_T_3 = eq(in1_xor_in2, UInt<32>("h0")) @[ALU.scala 88:72]
    node _io_cmp_out_T_4 = mux(_io_cmp_out_T_2, _io_cmp_out_T_3, slt) @[ALU.scala 88:43]
    node _io_cmp_out_T_5 = xor(_io_cmp_out_T, _io_cmp_out_T_4) @[ALU.scala 88:38]
    io.cmp_out <= _io_cmp_out_T_5 @[ALU.scala 88:14]
    node shamt = bits(io.in2, 4, 0) @[ALU.scala 91:32]
    node _shin_T = eq(io.func, UInt<5>("h5")) @[ALU.scala 92:26]
    node _shin_T_1 = eq(io.func, UInt<5>("hb")) @[ALU.scala 92:47]
    node _shin_T_2 = or(_shin_T, _shin_T_1) @[ALU.scala 92:36]
    node _shin_T_3 = shl(UInt<16>("hffff"), 16) @[Bitwise.scala 104:52]
    node _shin_T_4 = xor(UInt<32>("hffffffff"), _shin_T_3) @[Bitwise.scala 104:21]
    node _shin_T_5 = shr(io.in1, 16) @[Bitwise.scala 105:21]
    node _shin_T_6 = and(_shin_T_5, _shin_T_4) @[Bitwise.scala 105:31]
    node _shin_T_7 = bits(io.in1, 15, 0) @[Bitwise.scala 105:46]
    node _shin_T_8 = shl(_shin_T_7, 16) @[Bitwise.scala 105:70]
    node _shin_T_9 = not(_shin_T_4) @[Bitwise.scala 105:82]
    node _shin_T_10 = and(_shin_T_8, _shin_T_9) @[Bitwise.scala 105:80]
    node _shin_T_11 = or(_shin_T_6, _shin_T_10) @[Bitwise.scala 105:39]
    node _shin_T_12 = bits(_shin_T_4, 23, 0) @[Bitwise.scala 104:28]
    node _shin_T_13 = shl(_shin_T_12, 8) @[Bitwise.scala 104:52]
    node _shin_T_14 = xor(_shin_T_4, _shin_T_13) @[Bitwise.scala 104:21]
    node _shin_T_15 = shr(_shin_T_11, 8) @[Bitwise.scala 105:21]
    node _shin_T_16 = and(_shin_T_15, _shin_T_14) @[Bitwise.scala 105:31]
    node _shin_T_17 = bits(_shin_T_11, 23, 0) @[Bitwise.scala 105:46]
    node _shin_T_18 = shl(_shin_T_17, 8) @[Bitwise.scala 105:70]
    node _shin_T_19 = not(_shin_T_14) @[Bitwise.scala 105:82]
    node _shin_T_20 = and(_shin_T_18, _shin_T_19) @[Bitwise.scala 105:80]
    node _shin_T_21 = or(_shin_T_16, _shin_T_20) @[Bitwise.scala 105:39]
    node _shin_T_22 = bits(_shin_T_14, 27, 0) @[Bitwise.scala 104:28]
    node _shin_T_23 = shl(_shin_T_22, 4) @[Bitwise.scala 104:52]
    node _shin_T_24 = xor(_shin_T_14, _shin_T_23) @[Bitwise.scala 104:21]
    node _shin_T_25 = shr(_shin_T_21, 4) @[Bitwise.scala 105:21]
    node _shin_T_26 = and(_shin_T_25, _shin_T_24) @[Bitwise.scala 105:31]
    node _shin_T_27 = bits(_shin_T_21, 27, 0) @[Bitwise.scala 105:46]
    node _shin_T_28 = shl(_shin_T_27, 4) @[Bitwise.scala 105:70]
    node _shin_T_29 = not(_shin_T_24) @[Bitwise.scala 105:82]
    node _shin_T_30 = and(_shin_T_28, _shin_T_29) @[Bitwise.scala 105:80]
    node _shin_T_31 = or(_shin_T_26, _shin_T_30) @[Bitwise.scala 105:39]
    node _shin_T_32 = bits(_shin_T_24, 29, 0) @[Bitwise.scala 104:28]
    node _shin_T_33 = shl(_shin_T_32, 2) @[Bitwise.scala 104:52]
    node _shin_T_34 = xor(_shin_T_24, _shin_T_33) @[Bitwise.scala 104:21]
    node _shin_T_35 = shr(_shin_T_31, 2) @[Bitwise.scala 105:21]
    node _shin_T_36 = and(_shin_T_35, _shin_T_34) @[Bitwise.scala 105:31]
    node _shin_T_37 = bits(_shin_T_31, 29, 0) @[Bitwise.scala 105:46]
    node _shin_T_38 = shl(_shin_T_37, 2) @[Bitwise.scala 105:70]
    node _shin_T_39 = not(_shin_T_34) @[Bitwise.scala 105:82]
    node _shin_T_40 = and(_shin_T_38, _shin_T_39) @[Bitwise.scala 105:80]
    node _shin_T_41 = or(_shin_T_36, _shin_T_40) @[Bitwise.scala 105:39]
    node _shin_T_42 = bits(_shin_T_34, 30, 0) @[Bitwise.scala 104:28]
    node _shin_T_43 = shl(_shin_T_42, 1) @[Bitwise.scala 104:52]
    node _shin_T_44 = xor(_shin_T_34, _shin_T_43) @[Bitwise.scala 104:21]
    node _shin_T_45 = shr(_shin_T_41, 1) @[Bitwise.scala 105:21]
    node _shin_T_46 = and(_shin_T_45, _shin_T_44) @[Bitwise.scala 105:31]
    node _shin_T_47 = bits(_shin_T_41, 30, 0) @[Bitwise.scala 105:46]
    node _shin_T_48 = shl(_shin_T_47, 1) @[Bitwise.scala 105:70]
    node _shin_T_49 = not(_shin_T_44) @[Bitwise.scala 105:82]
    node _shin_T_50 = and(_shin_T_48, _shin_T_49) @[Bitwise.scala 105:80]
    node _shin_T_51 = or(_shin_T_46, _shin_T_50) @[Bitwise.scala 105:39]
    node shin = mux(_shin_T_2, io.in1, _shin_T_51) @[ALU.scala 92:17]
    node _shout_r_T = geq(io.func, UInt<5>("ha")) @[ALU.scala 56:31]
    node _shout_r_T_1 = leq(io.func, UInt<5>("hf")) @[ALU.scala 56:49]
    node _shout_r_T_2 = and(_shout_r_T, _shout_r_T_1) @[ALU.scala 56:42]
    node _shout_r_T_3 = bits(shin, 31, 31) @[ALU.scala 93:41]
    node _shout_r_T_4 = and(_shout_r_T_2, _shout_r_T_3) @[ALU.scala 93:36]
    node _shout_r_T_5 = cat(_shout_r_T_4, shin) @[Cat.scala 31:58]
    node _shout_r_T_6 = asSInt(_shout_r_T_5) @[ALU.scala 93:57]
    node _shout_r_T_7 = dshr(_shout_r_T_6, shamt) @[ALU.scala 93:64]
    node shout_r = bits(_shout_r_T_7, 31, 0) @[ALU.scala 93:73]
    node _shout_l_T = shl(UInt<16>("hffff"), 16) @[Bitwise.scala 104:52]
    node _shout_l_T_1 = xor(UInt<32>("hffffffff"), _shout_l_T) @[Bitwise.scala 104:21]
    node _shout_l_T_2 = shr(shout_r, 16) @[Bitwise.scala 105:21]
    node _shout_l_T_3 = and(_shout_l_T_2, _shout_l_T_1) @[Bitwise.scala 105:31]
    node _shout_l_T_4 = bits(shout_r, 15, 0) @[Bitwise.scala 105:46]
    node _shout_l_T_5 = shl(_shout_l_T_4, 16) @[Bitwise.scala 105:70]
    node _shout_l_T_6 = not(_shout_l_T_1) @[Bitwise.scala 105:82]
    node _shout_l_T_7 = and(_shout_l_T_5, _shout_l_T_6) @[Bitwise.scala 105:80]
    node _shout_l_T_8 = or(_shout_l_T_3, _shout_l_T_7) @[Bitwise.scala 105:39]
    node _shout_l_T_9 = bits(_shout_l_T_1, 23, 0) @[Bitwise.scala 104:28]
    node _shout_l_T_10 = shl(_shout_l_T_9, 8) @[Bitwise.scala 104:52]
    node _shout_l_T_11 = xor(_shout_l_T_1, _shout_l_T_10) @[Bitwise.scala 104:21]
    node _shout_l_T_12 = shr(_shout_l_T_8, 8) @[Bitwise.scala 105:21]
    node _shout_l_T_13 = and(_shout_l_T_12, _shout_l_T_11) @[Bitwise.scala 105:31]
    node _shout_l_T_14 = bits(_shout_l_T_8, 23, 0) @[Bitwise.scala 105:46]
    node _shout_l_T_15 = shl(_shout_l_T_14, 8) @[Bitwise.scala 105:70]
    node _shout_l_T_16 = not(_shout_l_T_11) @[Bitwise.scala 105:82]
    node _shout_l_T_17 = and(_shout_l_T_15, _shout_l_T_16) @[Bitwise.scala 105:80]
    node _shout_l_T_18 = or(_shout_l_T_13, _shout_l_T_17) @[Bitwise.scala 105:39]
    node _shout_l_T_19 = bits(_shout_l_T_11, 27, 0) @[Bitwise.scala 104:28]
    node _shout_l_T_20 = shl(_shout_l_T_19, 4) @[Bitwise.scala 104:52]
    node _shout_l_T_21 = xor(_shout_l_T_11, _shout_l_T_20) @[Bitwise.scala 104:21]
    node _shout_l_T_22 = shr(_shout_l_T_18, 4) @[Bitwise.scala 105:21]
    node _shout_l_T_23 = and(_shout_l_T_22, _shout_l_T_21) @[Bitwise.scala 105:31]
    node _shout_l_T_24 = bits(_shout_l_T_18, 27, 0) @[Bitwise.scala 105:46]
    node _shout_l_T_25 = shl(_shout_l_T_24, 4) @[Bitwise.scala 105:70]
    node _shout_l_T_26 = not(_shout_l_T_21) @[Bitwise.scala 105:82]
    node _shout_l_T_27 = and(_shout_l_T_25, _shout_l_T_26) @[Bitwise.scala 105:80]
    node _shout_l_T_28 = or(_shout_l_T_23, _shout_l_T_27) @[Bitwise.scala 105:39]
    node _shout_l_T_29 = bits(_shout_l_T_21, 29, 0) @[Bitwise.scala 104:28]
    node _shout_l_T_30 = shl(_shout_l_T_29, 2) @[Bitwise.scala 104:52]
    node _shout_l_T_31 = xor(_shout_l_T_21, _shout_l_T_30) @[Bitwise.scala 104:21]
    node _shout_l_T_32 = shr(_shout_l_T_28, 2) @[Bitwise.scala 105:21]
    node _shout_l_T_33 = and(_shout_l_T_32, _shout_l_T_31) @[Bitwise.scala 105:31]
    node _shout_l_T_34 = bits(_shout_l_T_28, 29, 0) @[Bitwise.scala 105:46]
    node _shout_l_T_35 = shl(_shout_l_T_34, 2) @[Bitwise.scala 105:70]
    node _shout_l_T_36 = not(_shout_l_T_31) @[Bitwise.scala 105:82]
    node _shout_l_T_37 = and(_shout_l_T_35, _shout_l_T_36) @[Bitwise.scala 105:80]
    node _shout_l_T_38 = or(_shout_l_T_33, _shout_l_T_37) @[Bitwise.scala 105:39]
    node _shout_l_T_39 = bits(_shout_l_T_31, 30, 0) @[Bitwise.scala 104:28]
    node _shout_l_T_40 = shl(_shout_l_T_39, 1) @[Bitwise.scala 104:52]
    node _shout_l_T_41 = xor(_shout_l_T_31, _shout_l_T_40) @[Bitwise.scala 104:21]
    node _shout_l_T_42 = shr(_shout_l_T_38, 1) @[Bitwise.scala 105:21]
    node _shout_l_T_43 = and(_shout_l_T_42, _shout_l_T_41) @[Bitwise.scala 105:31]
    node _shout_l_T_44 = bits(_shout_l_T_38, 30, 0) @[Bitwise.scala 105:46]
    node _shout_l_T_45 = shl(_shout_l_T_44, 1) @[Bitwise.scala 105:70]
    node _shout_l_T_46 = not(_shout_l_T_41) @[Bitwise.scala 105:82]
    node _shout_l_T_47 = and(_shout_l_T_45, _shout_l_T_46) @[Bitwise.scala 105:80]
    node shout_l = or(_shout_l_T_43, _shout_l_T_47) @[Bitwise.scala 105:39]
    node _shout_T = eq(io.func, UInt<5>("h5")) @[ALU.scala 95:27]
    node _shout_T_1 = eq(io.func, UInt<5>("hb")) @[ALU.scala 95:48]
    node _shout_T_2 = or(_shout_T, _shout_T_1) @[ALU.scala 95:37]
    node _shout_T_3 = mux(_shout_T_2, shout_r, UInt<32>("h0")) @[ALU.scala 95:18]
    node _shout_T_4 = eq(io.func, UInt<5>("h1")) @[ALU.scala 96:17]
    node _shout_T_5 = mux(_shout_T_4, shout_l, UInt<32>("h0")) @[ALU.scala 96:8]
    node shout = or(_shout_T_3, _shout_T_5) @[ALU.scala 95:82]
    node _logic_T = eq(io.func, UInt<5>("h4")) @[ALU.scala 99:27]
    node _logic_T_1 = xor(io.in1, io.in2) @[ALU.scala 99:46]
    node _logic_T_2 = eq(io.func, UInt<5>("h6")) @[ALU.scala 100:17]
    node _logic_T_3 = or(io.in1, io.in2) @[ALU.scala 100:35]
    node _logic_T_4 = eq(io.func, UInt<5>("h7")) @[ALU.scala 101:19]
    node _logic_T_5 = and(io.in1, io.in2) @[ALU.scala 101:38]
    node _logic_T_6 = mux(_logic_T_4, _logic_T_5, UInt<32>("h0")) @[ALU.scala 101:10]
    node _logic_T_7 = mux(_logic_T_2, _logic_T_3, _logic_T_6) @[ALU.scala 100:8]
    node logic = mux(_logic_T, _logic_T_1, _logic_T_7) @[ALU.scala 99:18]
    node _shift_logic_cmp_T = geq(io.func, UInt<5>("hc")) @[ALU.scala 57:31]
    node _shift_logic_cmp_T_1 = leq(io.func, UInt<5>("hf")) @[ALU.scala 57:49]
    node _shift_logic_cmp_T_2 = and(_shift_logic_cmp_T, _shift_logic_cmp_T_1) @[ALU.scala 57:42]
    node _shift_logic_cmp_T_3 = and(_shift_logic_cmp_T_2, slt) @[ALU.scala 103:40]
    node _shift_logic_cmp_T_4 = or(_shift_logic_cmp_T_3, logic) @[ALU.scala 103:47]
    node shift_logic_cmp = or(_shift_logic_cmp_T_4, shout) @[ALU.scala 103:55]
    node _out_T = eq(io.func, UInt<5>("h0")) @[ALU.scala 104:25]
    node _out_T_1 = eq(io.func, UInt<5>("ha")) @[ALU.scala 104:47]
    node _out_T_2 = or(_out_T, _out_T_1) @[ALU.scala 104:36]
    node out = mux(_out_T_2, adder_out, shift_logic_cmp) @[ALU.scala 104:16]
    node _minu_T = gt(io.in1, io.in2) @[ALU.scala 107:22]
    node minu = mux(_minu_T, io.in2, io.in1) @[ALU.scala 107:15]
    node _maxu_T = gt(io.in1, io.in2) @[ALU.scala 108:22]
    node maxu = mux(_maxu_T, io.in1, io.in2) @[ALU.scala 108:15]
    node in1s = asSInt(io.in1) @[ALU.scala 109:25]
    node in2s = asSInt(io.in2) @[ALU.scala 110:25]
    node _mins_T = gt(in1s, in2s) @[ALU.scala 111:20]
    node _mins_T_1 = mux(_mins_T, in2s, in1s) @[ALU.scala 111:15]
    node mins = asUInt(_mins_T_1) @[ALU.scala 111:37]
    node _maxs_T = gt(in1s, in2s) @[ALU.scala 112:20]
    node _maxs_T_1 = mux(_maxs_T, in1s, in2s) @[ALU.scala 112:15]
    node maxs = asUInt(_maxs_T_1) @[ALU.scala 112:37]
    node _minmaxout_T = eq(io.func, UInt<5>("h11")) @[ALU.scala 113:30]
    node _minmaxout_T_1 = eq(io.func, UInt<5>("h10")) @[ALU.scala 114:30]
    node _minmaxout_T_2 = eq(io.func, UInt<5>("h13")) @[ALU.scala 115:30]
    node _minmaxout_T_3 = mux(_minmaxout_T_2, minu, maxu) @[ALU.scala 115:22]
    node _minmaxout_T_4 = mux(_minmaxout_T_1, maxs, _minmaxout_T_3) @[ALU.scala 114:22]
    node minmaxout = mux(_minmaxout_T, mins, _minmaxout_T_4) @[ALU.scala 113:22]
    node _io_out_T = eq(io.func, UInt<5>("h8")) @[ALU.scala 117:24]
    node _io_out_T_1 = bits(io.func, 4, 2) @[ALU.scala 61:27]
    node _io_out_T_2 = eq(_io_out_T_1, UInt<3>("h4")) @[ALU.scala 61:32]
    node _io_out_T_3 = mux(_io_out_T_2, minmaxout, out) @[ALU.scala 118:16]
    node _io_out_T_4 = mux(_io_out_T, io.in2, _io_out_T_3) @[ALU.scala 117:16]
    io.out <= _io_out_T_4 @[ALU.scala 117:10]

  module Queue :
    input clock : Clock
    input reset : Reset
    output io : { flip enq : { flip ready : UInt<1>, valid : UInt<1>, bits : { wb_wxd_rd : UInt<32>, wxd : UInt<1>, reg_idxw : UInt<5>, warp_id : UInt<2>}}, deq : { flip ready : UInt<1>, valid : UInt<1>, bits : { wb_wxd_rd : UInt<32>, wxd : UInt<1>, reg_idxw : UInt<5>, warp_id : UInt<2>}}, count : UInt<1>}

    cmem ram : { wb_wxd_rd : UInt<32>, wxd : UInt<1>, reg_idxw : UInt<5>, warp_id : UInt<2>} [1] @[Decoupled.scala 259:95]
    wire enq_ptr_value : UInt
    enq_ptr_value <= UInt<1>("h0")
    wire deq_ptr_value : UInt
    deq_ptr_value <= UInt<1>("h0")
    reg maybe_full : UInt<1>, clock with :
      reset => (reset, UInt<1>("h0")) @[Decoupled.scala 262:27]
    node ptr_match = eq(enq_ptr_value, deq_ptr_value) @[Decoupled.scala 263:33]
    node _empty_T = eq(maybe_full, UInt<1>("h0")) @[Decoupled.scala 264:28]
    node empty = and(ptr_match, _empty_T) @[Decoupled.scala 264:25]
    node full = and(ptr_match, maybe_full) @[Decoupled.scala 265:24]
    node _do_enq_T = and(io.enq.ready, io.enq.valid) @[Decoupled.scala 50:35]
    wire do_enq : UInt<1>
    do_enq <= _do_enq_T
    node _do_deq_T = and(io.deq.ready, io.deq.valid) @[Decoupled.scala 50:35]
    wire do_deq : UInt<1>
    do_deq <= _do_deq_T
    when do_enq : @[Decoupled.scala 272:16]
      infer mport MPORT = ram[UInt<1>("h0")], clock @[Decoupled.scala 273:8]
      MPORT <= io.enq.bits @[Decoupled.scala 273:24]
    when do_deq : @[Decoupled.scala 276:16]
      skip
    node _T = neq(do_enq, do_deq) @[Decoupled.scala 279:15]
    when _T : @[Decoupled.scala 279:27]
      maybe_full <= do_enq @[Decoupled.scala 280:16]
    when UInt<1>("h0") : @[Decoupled.scala 282:15]
      enq_ptr_value <= UInt<1>("h0") @[Counter.scala 99:11]
      deq_ptr_value <= UInt<1>("h0") @[Counter.scala 99:11]
      maybe_full <= UInt<1>("h0") @[Decoupled.scala 285:16]
    node _io_deq_valid_T = eq(empty, UInt<1>("h0")) @[Decoupled.scala 288:19]
    io.deq.valid <= _io_deq_valid_T @[Decoupled.scala 288:16]
    node _io_enq_ready_T = eq(full, UInt<1>("h0")) @[Decoupled.scala 289:19]
    io.enq.ready <= _io_enq_ready_T @[Decoupled.scala 289:16]
    infer mport io_deq_bits_MPORT = ram[UInt<1>("h0")], clock @[Decoupled.scala 296:23]
    io.deq.bits <= io_deq_bits_MPORT @[Decoupled.scala 296:17]
    when io.deq.ready : @[Decoupled.scala 309:24]
      io.enq.ready <= UInt<1>("h1") @[Decoupled.scala 309:39]
    node _ptr_diff_T = sub(enq_ptr_value, deq_ptr_value) @[Decoupled.scala 312:32]
    node ptr_diff = tail(_ptr_diff_T, 1) @[Decoupled.scala 312:32]
    node _io_count_T = and(maybe_full, ptr_match) @[Decoupled.scala 315:32]
    node _io_count_T_1 = mux(_io_count_T, UInt<1>("h1"), UInt<1>("h0")) @[Decoupled.scala 315:20]
    node _io_count_T_2 = or(_io_count_T_1, ptr_diff) @[Decoupled.scala 315:62]
    io.count <= _io_count_T_2 @[Decoupled.scala 315:14]

  module Queue_1 :
    input clock : Clock
    input reset : Reset
    output io : { flip enq : { flip ready : UInt<1>, valid : UInt<1>, bits : { wid : UInt<2>, jump : UInt<1>, new_pc : UInt<32>}}, deq : { flip ready : UInt<1>, valid : UInt<1>, bits : { wid : UInt<2>, jump : UInt<1>, new_pc : UInt<32>}}, count : UInt<1>}

    cmem ram : { wid : UInt<2>, jump : UInt<1>, new_pc : UInt<32>} [1] @[Decoupled.scala 259:95]
    wire enq_ptr_value : UInt
    enq_ptr_value <= UInt<1>("h0")
    wire deq_ptr_value : UInt
    deq_ptr_value <= UInt<1>("h0")
    reg maybe_full : UInt<1>, clock with :
      reset => (reset, UInt<1>("h0")) @[Decoupled.scala 262:27]
    node ptr_match = eq(enq_ptr_value, deq_ptr_value) @[Decoupled.scala 263:33]
    node _empty_T = eq(maybe_full, UInt<1>("h0")) @[Decoupled.scala 264:28]
    node empty = and(ptr_match, _empty_T) @[Decoupled.scala 264:25]
    node full = and(ptr_match, maybe_full) @[Decoupled.scala 265:24]
    node _do_enq_T = and(io.enq.ready, io.enq.valid) @[Decoupled.scala 50:35]
    wire do_enq : UInt<1>
    do_enq <= _do_enq_T
    node _do_deq_T = and(io.deq.ready, io.deq.valid) @[Decoupled.scala 50:35]
    wire do_deq : UInt<1>
    do_deq <= _do_deq_T
    when do_enq : @[Decoupled.scala 272:16]
      infer mport MPORT = ram[UInt<1>("h0")], clock @[Decoupled.scala 273:8]
      MPORT <= io.enq.bits @[Decoupled.scala 273:24]
    when do_deq : @[Decoupled.scala 276:16]
      skip
    node _T = neq(do_enq, do_deq) @[Decoupled.scala 279:15]
    when _T : @[Decoupled.scala 279:27]
      maybe_full <= do_enq @[Decoupled.scala 280:16]
    when UInt<1>("h0") : @[Decoupled.scala 282:15]
      enq_ptr_value <= UInt<1>("h0") @[Counter.scala 99:11]
      deq_ptr_value <= UInt<1>("h0") @[Counter.scala 99:11]
      maybe_full <= UInt<1>("h0") @[Decoupled.scala 285:16]
    node _io_deq_valid_T = eq(empty, UInt<1>("h0")) @[Decoupled.scala 288:19]
    io.deq.valid <= _io_deq_valid_T @[Decoupled.scala 288:16]
    node _io_enq_ready_T = eq(full, UInt<1>("h0")) @[Decoupled.scala 289:19]
    io.enq.ready <= _io_enq_ready_T @[Decoupled.scala 289:16]
    infer mport io_deq_bits_MPORT = ram[UInt<1>("h0")], clock @[Decoupled.scala 296:23]
    io.deq.bits <= io_deq_bits_MPORT @[Decoupled.scala 296:17]
    when io.deq.ready : @[Decoupled.scala 309:24]
      io.enq.ready <= UInt<1>("h1") @[Decoupled.scala 309:39]
    node _ptr_diff_T = sub(enq_ptr_value, deq_ptr_value) @[Decoupled.scala 312:32]
    node ptr_diff = tail(_ptr_diff_T, 1) @[Decoupled.scala 312:32]
    node _io_count_T = and(maybe_full, ptr_match) @[Decoupled.scala 315:32]
    node _io_count_T_1 = mux(_io_count_T, UInt<1>("h1"), UInt<1>("h0")) @[Decoupled.scala 315:20]
    node _io_count_T_2 = or(_io_count_T_1, ptr_diff) @[Decoupled.scala 315:62]
    io.count <= _io_count_T_2 @[Decoupled.scala 315:14]

  module ALUexe :
    input clock : Clock
    input reset : UInt<1>
    output io : { flip in : { flip ready : UInt<1>, valid : UInt<1>, bits : { in1 : UInt<32>, in2 : UInt<32>, in3 : UInt<32>, ctrl : { inst : UInt<32>, wid : UInt<2>, fp : UInt<1>, branch : UInt<2>, simt_stack : UInt<1>, simt_stack_op : UInt<1>, barrier : UInt<1>, csr : UInt<2>, reverse : UInt<1>, sel_alu2 : UInt<2>, sel_alu1 : UInt<2>, isvec : UInt<1>, sel_alu3 : UInt<2>, mask : UInt<1>, sel_imm : UInt<3>, mem_whb : UInt<2>, mem_unsigned : UInt<1>, alu_fn : UInt<6>, mem : UInt<1>, mul : UInt<1>, mem_cmd : UInt<2>, mop : UInt<2>, reg_idx1 : UInt<5>, reg_idx2 : UInt<5>, reg_idx3 : UInt<5>, reg_idxw : UInt<5>, wfd : UInt<1>, fence : UInt<1>, sfu : UInt<1>, readmask : UInt<1>, writemask : UInt<1>, wxd : UInt<1>, pc : UInt<32>}}}, out : { flip ready : UInt<1>, valid : UInt<1>, bits : { wb_wxd_rd : UInt<32>, wxd : UInt<1>, reg_idxw : UInt<5>, warp_id : UInt<2>}}, out2br : { flip ready : UInt<1>, valid : UInt<1>, bits : { wid : UInt<2>, jump : UInt<1>, new_pc : UInt<32>}}}

    inst alu of ScalarALU @[execution.scala 29:17]
    alu.clock <= clock
    alu.reset <= reset
    alu.io.in1 <= io.in.bits.in1 @[execution.scala 30:13]
    alu.io.in2 <= io.in.bits.in2 @[execution.scala 31:13]
    alu.io.in3 <= io.in.bits.in3 @[execution.scala 32:13]
    node _alu_io_func_T = bits(io.in.bits.ctrl.alu_fn, 4, 0) @[execution.scala 33:38]
    alu.io.func <= _alu_io_func_T @[execution.scala 33:14]
    inst result of Queue @[execution.scala 34:20]
    result.clock <= clock
    result.reset <= reset
    inst result_br of Queue_1 @[execution.scala 35:23]
    result_br.clock <= clock
    result_br.reset <= reset
    io.out.bits <= result.io.deq.bits @[execution.scala 36:16]
    io.out.valid <= result.io.deq.valid @[execution.scala 36:16]
    result.io.deq.ready <= io.out.ready @[execution.scala 36:16]
    io.out2br.bits <= result_br.io.deq.bits @[execution.scala 37:19]
    io.out2br.valid <= result_br.io.deq.valid @[execution.scala 37:19]
    result_br.io.deq.ready <= io.out2br.ready @[execution.scala 37:19]
    wire _result_io_enq_bits_WIRE : { wb_wxd_rd : UInt<32>, wxd : UInt<1>, reg_idxw : UInt<5>, warp_id : UInt<2>} @[execution.scala 38:35]
    _result_io_enq_bits_WIRE.warp_id <= UInt<2>("h0") @[execution.scala 38:35]
    _result_io_enq_bits_WIRE.reg_idxw <= UInt<5>("h0") @[execution.scala 38:35]
    _result_io_enq_bits_WIRE.wxd <= UInt<1>("h0") @[execution.scala 38:35]
    _result_io_enq_bits_WIRE.wb_wxd_rd <= UInt<32>("h0") @[execution.scala 38:35]
    result.io.enq.bits.warp_id <= _result_io_enq_bits_WIRE.warp_id @[execution.scala 38:21]
    result.io.enq.bits.reg_idxw <= _result_io_enq_bits_WIRE.reg_idxw @[execution.scala 38:21]
    result.io.enq.bits.wxd <= _result_io_enq_bits_WIRE.wxd @[execution.scala 38:21]
    result.io.enq.bits.wb_wxd_rd <= _result_io_enq_bits_WIRE.wb_wxd_rd @[execution.scala 38:21]
    result.io.enq.bits.warp_id <= io.in.bits.ctrl.wid @[execution.scala 39:29]
    result.io.enq.bits.wb_wxd_rd <= alu.io.out @[execution.scala 40:31]
    result.io.enq.bits.reg_idxw <= io.in.bits.ctrl.reg_idxw @[execution.scala 41:30]
    result.io.enq.bits.wxd <= io.in.bits.ctrl.wxd @[execution.scala 42:25]
    node _io_in_ready_T = and(result_br.io.enq.ready, result.io.enq.ready) @[execution.scala 44:71]
    node _io_in_ready_T_1 = eq(UInt<2>("h1"), io.in.bits.ctrl.branch) @[Mux.scala 81:61]
    node _io_in_ready_T_2 = mux(_io_in_ready_T_1, result_br.io.enq.ready, _io_in_ready_T) @[Mux.scala 81:58]
    node _io_in_ready_T_3 = eq(UInt<2>("h0"), io.in.bits.ctrl.branch) @[Mux.scala 81:61]
    node _io_in_ready_T_4 = mux(_io_in_ready_T_3, result.io.enq.ready, _io_in_ready_T_2) @[Mux.scala 81:58]
    io.in.ready <= _io_in_ready_T_4 @[execution.scala 44:14]
    result_br.io.enq.bits.wid <= io.in.bits.ctrl.wid @[execution.scala 46:28]
    result_br.io.enq.bits.new_pc <= io.in.bits.in3 @[execution.scala 47:31]
    node _result_br_io_enq_bits_jump_T = eq(UInt<2>("h1"), io.in.bits.ctrl.branch) @[Mux.scala 81:61]
    node _result_br_io_enq_bits_jump_T_1 = mux(_result_br_io_enq_bits_jump_T, alu.io.cmp_out, UInt<1>("h0")) @[Mux.scala 81:58]
    node _result_br_io_enq_bits_jump_T_2 = eq(UInt<2>("h2"), io.in.bits.ctrl.branch) @[Mux.scala 81:61]
    node _result_br_io_enq_bits_jump_T_3 = mux(_result_br_io_enq_bits_jump_T_2, UInt<1>("h1"), _result_br_io_enq_bits_jump_T_1) @[Mux.scala 81:58]
    node _result_br_io_enq_bits_jump_T_4 = eq(UInt<2>("h3"), io.in.bits.ctrl.branch) @[Mux.scala 81:61]
    node _result_br_io_enq_bits_jump_T_5 = mux(_result_br_io_enq_bits_jump_T_4, UInt<1>("h1"), _result_br_io_enq_bits_jump_T_3) @[Mux.scala 81:58]
    result_br.io.enq.bits.jump <= _result_br_io_enq_bits_jump_T_5 @[execution.scala 48:29]
    node _result_br_io_enq_valid_T = neq(io.in.bits.ctrl.branch, UInt<2>("h0")) @[execution.scala 50:62]
    node _result_br_io_enq_valid_T_1 = and(io.in.valid, _result_br_io_enq_valid_T) @[execution.scala 50:38]
    result_br.io.enq.valid <= _result_br_io_enq_valid_T_1 @[execution.scala 50:25]
    node _result_io_enq_valid_T = and(io.in.valid, io.in.bits.ctrl.wxd) @[execution.scala 51:35]
    result.io.enq.valid <= _result_io_enq_valid_T @[execution.scala 51:22]

